/*******************************************************************************
* Long-term effects of weather-induced migration on urban labor and housing 
  markets
* Busso & Chauvin

* Purpose: Cities Panel 1991-2010

*******************************************************************************/
/*******************************************************************************
	ENVIRONMENT
*******************************************************************************/
clear all
set maxvar 32767

/* Data structure */
set varabbrev off
set type double
set seed 123456789

/* Data (general) */
* Census years used
global cyrs "1991 2000 2010"
global cyrs_housing "1991 2010"

* Globals for Mincerian Regression
global age_tier "age2_sh age3_sh age4_sh age5_sh"
global educ2 "dpr ms hi cold"
global controls_housing "l_rooms sh_sewer_access trash"

/* Geographic unities */

* Final geographic unity for data aggregation (cities)
global geo mcarp8010

* Auxiliary geographic unity for data aggregation (rural) (ie., merging)
global geo2 mca8010

* File path with all the grographic unities
global geocodefile "${db_inp}/geo_codes_sat_cities.dta"

/* Check if parmest and svmat2 are installed, install them if not */
cap which "parmest"
if _rc == 111 {
  ssc install "parmest", replace all
}

/*******************************************************************************
	1.1	MAIN DATA: PROCESSING MAIN CENSUS VARIABLES
*******************************************************************************/
foreach year in $cyrs {
	local y = substr("`year'",3,.)
	
	* Load Census database
	cd "${census_harm}"
	unzipfile Census_`y'
	use "Census_`y'", replace

	compress
	
	* Add year of the Census
	gen year = `year'
	
	* Deflator for nominal variables
	gen adjfact = .
	replace adjfact = 1 if year == 2010 
	replace adjfact = 1.952133194554 if year == 2000 
	replace adjfact = 0.0085285766539 if year == 1991 
	
	/* General variables */

	* Add geographic unities for vars collapse
	merge m:1 muni using "${geocodefile}", nogen ///
	keepusing(${geo} ${geo2}) keep(match master)
	drop muni
	rename $geo ${geo}_d
	rename $geo2 ${geo2}_d
	
	rename mgprevmuni muni
	merge m:1 muni using "${geocodefile}", nogen ///
	keepusing(${geo} ${geo2}) keep(match master)
	drop muni
	rename $geo ${geo}_o
	rename $geo2 ${geo2}_o	
	
	* Migrant dummy
	gen migrant = (mgyrsmuni < 10)
		
	* Make dummies for migrants from rural/urban places
	gen migrant_o_urb = (${geo}_o != 1000000 & migrant == 1) if ${geo}_o != .
	lab var migrant_o_urb "Migrant of urban place of origin."
	gen migrant_o_rur = (${geo}_o == 1000000 & migrant == 1) if ${geo}_o != .
	lab var migrant_o_rur "Migrant of rural place of origin."
	
	* Make dummies for destination of migrants (rural/urban)
	gen migrant_d_urb = (${geo}_d != 1000000 & migrant == 1)
	lab var migrant_d_urb "Migrant who went to urban place."
	gen migrant_d_rur = (${geo}_d == 1000000 & migrant == 1)
	lab var migrant_d_rur "Migrants who went to rural place."
	
	* Make dummy to indicate whether the migrant migrated within the same MCA, CITY
	gen migrant_same = (${geo}_o == ${geo}_d | ${geo2}_o == ${geo2}_d)
		
	* Make dummies for rural-rural, rural-urban, urban-urban and urban-rural migration
	gen migrant_urb_urb = (migrant_o_urb == 1 & migrant_d_urb == 1 & migrant_same != 1)
	gen migrant_rur_urb = (migrant_o_rur == 1 & migrant_d_urb == 1 & migrant_same != 1)
	replace migrant_urb_urb = . if migrant == 0 | migrant_same == 1
	replace migrant_rur_urb = . if migrant == 0 | migrant_same == 1
	
	* Recent migrants (5 or less years in a municipality)
	gen migrant2 = (mgyrsmuni <= 5)
	gen migrant_o_rur2 = (${geo}_o == 1000000 & migrant2 == 1) if ${geo}_o != .
	gen migrant_d_urb2 = (${geo}_d != 1000000 & migrant2 == 1)
	gen migrant_rur_urb2 = (migrant_o_rur2 == 1 & migrant_d_urb2 == 1 & migrant_same != 1)
	replace migrant_rur_urb2 = . if migrant2 == 0 | migrant_same == 1
	
	* Return to original var names for merging
	rename ${geo}_d ${geo} 
	rename ${geo2}_d ${geo2} 
	
	tempfile census_with_rural_`y'
	save `census_with_rural_`y'', replace
	
	* Drop rural areas
	drop if ${geo} == 1000000
	
	/* Basic gendered, age, working age, and urban/rural dummies */
	
	* Age tiers
	gen age1_sh = (age < 15)
	gen age2_sh = (age >= 15 & age <= 24)
	gen age3_sh = (age >= 25 & age <= 39)
	gen age4_sh = (age >= 40 & age <= 54)
	gen age5_sh = (age >= 55)

	/* Basic education dummies (It restricts to working age population) */
	gen nop = (workage == 1 & schcomp == 1) if schcomp != .						/* Dummy for RESIDUALS: school levels: no education */
	gen dpr = (workage == 1 & schcomp == 2) if schcomp != .						/* Dummy for RESIDUALS: primary school */
	gen ms = (workage == 1 & schcomp == 3) if schcomp != . 						/* Dummy for RESIDUALS: middle school */
	gen hi = (workage == 1 & schcomp == 4) if schcomp != . 						/* Dummy for RESIDUALS: high school */
	gen cold = (workage == 1 & schcomp == 5) if schcomp != .					/* Dummy for RESIDUALS: higher education */
	
	gen nohs = (schcomp == 1 | schcomp == 2 | schcomp == 3) if schcomp != . ///
	& workage == 1  															/* Dummy for OUTCOME: less than high school + no educ */
	gen hs = (schcomp == 4 | schcomp == 5) if schcomp != . & workage == 1		/* Dummy for OUTCOME: at least high school */
	
	gen sh_emp_col = (schcomp == 5) if employed == 1 & schcomp != .             /* Dummy for CONTROL: workers with college education */
	
	global varlist "nohs hs"
	foreach var in $varlist {
		gen `var'_r = `var' if migrant == 0
	}
		
	compress
	
	tempfile census_general_`y'
	save `census_general_`y'', replace
	
}
	
/*******************************************************************************
	1.2.	MAIN DATA: LABOR VARIABLES
*******************************************************************************/
foreach year in $cyrs {
	local y = substr("`year'", 3, .)
	
	* Load Census database
	use `census_general_`y'', replace

	/* Create labor outcomes */
	* Create wage var
	rename incearn wage

	* Restrict all main variables to workage individuals only
	global vars_null "wage brind unemployed employed informal"
	
	foreach var in $vars_null {
		replace `var' = . if workage == 0 
	}
	
	replace wage = . if wage <= 0
	replace wage = wage*adjfact
	gen l_wage = ln(wage)

	* Main industry variables
	gen imf_sh = (brind == 2) if brind != . & employed == 1		/* Employed workers in manufacturing */
	gen igv_sh = (brind == 4) if brind != . & employed == 1		/* Employed workers in public service */
	gen isv_sh = (brind == 3) if brind != . & employed == 1		/* Employed workers in other services */
	
	* Generate employment variables
	gen emp = employed
	
	* Generate informality variables
	gen rate_inform = informal if employed == 1
	gen rate_inform2 = informal2 if employed == 1
	
	* Generate participation variables
	gen rate_part = (employed + unemployed) if workage == 1
	
	* Education-experience adjusted wages (national level)
	reg l_wage $educ2 $age_tier [pw = wtper], r
	predict l_wres2, resid
	replace l_wres2 = . if l_wage == .
	egen l_wage_mean2 = mean(l_wage)
	gen l_wageres2 = l_wage_mean2 + l_wres2
	drop l_wage_mean2 l_wres2
	
	/* Heterogeneities */
	* By migratory status
	global varlist "l_wageres2 emp rate_part imf_sh isv_sh rate_inform rate_inform2"
	foreach var in $varlist {
		gen `var'_r = `var' if migrant == 0
	}
	global varlist "l_wageres2 emp"
	foreach var in $varlist {
		gen `var'_g = `var' if migrant_rur_urb2 == 1
	}
	
	* By education 
	global varlist "l_wageres2_r emp_r"
	foreach var in $varlist {
		gen `var'_nohs = `var' if nohs == 1 
		gen `var'_hs = `var' if hs == 1
	}

	* By industry
	global varlist "l_wageres2_r emp_r"
	foreach var in $varlist {
		gen `var'_imf = `var' if imf_sh == 1
		gen `var'_isv = `var' if isv_sh == 1
	}
	
	/* Save individual observations before collapsing for other tables */
	save "${db_tmp}/${geo}_pcity_ind_`y'", replace
	
	compress
	
	/* Collapsing at the city level */
	collapse (mean) l_wageres2_* rate_part_* sh_emp_col i*_sh* hs_* rate_inform* ///
	(sum) emp_* [pw = wtper], by($geo) fast
	
	/* Store in a macro all non-log variables that are not shares to generate log versions */
	quietly ds emp_* 
	local nolog_var "`r(varlist)'"
	
	foreach var in `nolog_var' {
		gen l_`var' = ln(`var')
	}
		
	/* Generate a year variable, for merging, and save */
	gen year = `year'
	
	save "${db_tmp}/${geo}_pcity_avg_`y'", replace
}
	
/*******************************************************************************
	1.3.	MAIN DATA: HOUSING VARIABLES
*******************************************************************************/
foreach year in $cyrs_housing {	
	local y = substr("`year'", 3, .)
	
	* Load Census database
	use `census_general_`y'', replace
	
	if `year' == 1991 gen comtime_10 = .
	
	compress
	gen random = uniform()
	sort random
	gen unique_id = _n
	sort unique_id // this is a random ID, in case sorting affects computations
	
	/* Household variables */
	
	* Recode null values
	replace rooms = . if rooms == 0 | pid != 1

	* House amenities
	replace wallsmat = . if wallsmat > 6
	
	* Unique households
	gen houses = (pid == 1)
	
	* Replace by logs the non dummy variables
	gen l_rooms = ln(rooms)

	/* Rent */	
	* Keep one rent per family
	replace rent = . if pid != 1 | rent >= 990000
	
	* Apply deflactor to rent and log scale
	gen l_rent = ln(rent * adjfact)
	replace l_rent = . if l_rent == 0
	
	* Household adjusted rent (national level)
	sort unique_id
	reg l_rent l_rooms sh_sewer_access trash if pid == 1 [pw = wtper], r
	predict l_rres2, resid
	replace l_rres2 = . if rent == . | rooms == . | sh_sewer_access == . | trash == . | pid != 1
	egen l_rent_mean2 = mean(l_rent)
	gen l_rentres2 = l_rent_mean2 + l_rres2
	drop l_rent_mean2 l_rres2
	sort unique_id
	
	/* Ownership */
	gen own_residents = own if (migrant == 0 & pid == 1)
	gen own_all = own if (pid == 1)
	
	/* Housing quality variables */	
	gen brickwall = wallsmat == 1 if !missing(wallsmat)
	gen waternet = watprov == 1	if !missing(watprov)
	drop wallsmat watprov 

	gen prec_no_miss = sh_sewer_access != . &  trash != .  & brickwall != . &  waternet != .

	* Precary housing 
	gen no_sewer_access = sh_sewer_access == 0 	if prec_no_miss == 1
	gen no_trash = trash == 0 					if prec_no_miss == 1
	gen no_brickwall = brickwall == 0 			if prec_no_miss == 1 
	gen no_waternet = waternet == 0 			if prec_no_miss == 1

	* Total precarity 
	egen precarity_count = rowtotal(no_sewer_access no_trash no_brickwall no_waternet)

	* Take care of the missing values 
	replace precarity_count = . if prec_no_miss != 1

	* Create quality indexes 
	gen hhquality_l = precarity_count == 4 if prec_no_miss == 1
	gen hhquality_m = precarity_count > 0 & precarity_count < 4 if prec_no_miss == 1
	gen hhquality_h = precarity_count == 0 if prec_no_miss == 1
	
	/* Heterogeneities */
	* By quality
	foreach q in "l" "m" "h" {
		gen houses_`q' = (pid == 1 & hhquality_`q' == 1)
		gen rooms_`q' = rooms if (pid == 1 & hhquality_`q' == 1)
		gen l_rentres2_`q' = l_rentres2 if (pid == 1 & hhquality_`q' == 1)	
		gen own_residents_`q' = own_residents if (pid == 1 & migrant == 0 & hhquality_`q' == 1)	
		gen own_all_`q' = own_all if (pid == 1 & hhquality_`q' == 1)	
		gen comtime_10_`q' = comtime_10 if hhquality_`q' == 1
	}
	
	* Outcomes for residents and recent migrants
	global varlist "l_rentres2 l_rentres2_l l_rentres2_m l_rentres2_h houses houses_l houses_m houses_h rooms rooms_l rooms_m rooms_h"
	foreach var in $varlist {
		gen `var'_r = `var' if migrant == 0
		gen `var'_g = `var' if migrant_rur_urb2 == 1
	}
	
	/* Save individual observations before collapsing for other tables */
	save "${db_tmp}/${geo}_phous_ind_`y'", replace
	
	compress
		
	/* Collapsing at the city level */
	collapse (mean)  l_rentres2* comtime_10* own* (sum) houses* rooms* ///
	[pw = wtper], by(${geo}) fast
	
	/* Store in a macro all non-log variables that are not shares to generate log versions */
	quietly ds rooms* houses*
	local nolog_var "`r(varlist)'"
	
	foreach var in `nolog_var' {
		gen l_`var' = ln(`var')
		drop `var'
	}
		
	/* Generate a year variable, for merging, and save */
	gen year = `year'
	
	save "${db_tmp}/${geo}_phous_avg_`y'", replace

}

/*******************************************************************************
	2.	EMIGRATION EFFECT 
*******************************************************************************/
foreach year in $cyrs {
	local y = substr("`year'", 3, .)
	
	* Load Census database
	use `census_with_rural_`y'', replace
	
	* Identify destination cities
	keep if migrant_rur_urb == 1
	keep ${geo}
	duplicates drop ${geo}, force
	
	* Now these are going to be our origin cities
	rename ${geo} ${geo}_o
	
	* Identify people that said that 5 or 10 years ago they lived in these cities 
	merge 1:m ${geo}_o using `census_with_rural_`y''
	keep if _merge == 3
	drop _merge
	
	* Expulsion effect 
	gen expulsion = ${geo}_o != ${geo}    
	
	compress
	
	* Collapsing at the city level 
	collapse (mean) expulsion [pw = wtper], by(${geo}_o) fast
	
	rename ${geo}_o ${geo}
	
	* Generate a year variable, for merging, and save 
	gen year = `year'
	
	save "${db_tmp}/${geo}_expulsion_avg_`y'", replace
	
}

/*******************************************************************************
	3.	COMBES ET AL. FIRST STEP
*******************************************************************************/
do $db_scr/combes_first_step.do

/*******************************************************************************
	4.	MERGING VARIABLES AND APPENDING YEARS
*******************************************************************************/
/* Merge variables */
foreach year in $cyrs {
	local y = substr("`year'",3,.)
	
	use "${db_tmp}/${geo}_pcity_avg_`y'", replace
	merge 1:1 $geo using "${db_tmp}/${geo}_expulsion_avg_`y'", nogen
	
	* Merging cities fixed effects
	if "`y'" == "91" | "`y'" == "10" {
		merge 1:1 $geo using "${db_tmp}/${geo}_phous_avg_`y'", nogen
		merge 1:1 $geo using "${db_tmp}/combes_1st_a_l_wage_r_`y'", nogen
		merge 1:1 $geo using "${db_tmp}/combes_1st_a_l_wage_g_`y'", nogen
		merge 1:1 $geo using "${db_tmp}/combes_1st_a_l_wage_r_ind_`y'", nogen
		merge 1:1 $geo using "${db_tmp}/combes_1st_a_l_wage_r_hs_`y'", nogen
		merge 1:1 $geo using "${db_tmp}/combes_1st_a_l_rent_r_`y'", nogen
		merge 1:1 $geo using "${db_tmp}/combes_1st_a_l_rent_g_`y'", nogen
		merge 1:1 $geo using "${db_tmp}/combes_1st_a_l_rent_rr_qual_`y'", nogen
	}
	save "${db_tmp}/${geo}_census_`y'_citypanel", replace
}

/* Append all datasets */
use "${db_tmp}/${geo}_census_91_citypanel", clear
append using "${db_tmp}/${geo}_census_00_citypanel"
append using "${db_tmp}/${geo}_census_10_citypanel"

/* Labels and order */
cap drop state
gen state = floor(mcarp8010/100000)
order year $geo, first

save "${db_tmp}/${geo}_census_citypanel", replace

sleep 100000

/*******************************************************************************
	5. SET DATA AS PANEL AND GENERATE DIFFERENCES
*******************************************************************************/
/* Set data as panel */
tsset $geo year 

/* Store in macro all share/rate variables that appear in all Censuses */
qui ds l_wageres2_* rate_part_* ???_sh* hs_* l_emp* rate_inform* ///
l_rentres2* l_houses* l_rooms* own* cityfe* 
local vars_all "`r(varlist)'"

/* Generate differences */
sort $geo year
foreach var of varlist `vars_all' {
	gen d_`var'_9110 = f19.`var' - `var' if year == 1991
	label var d_`var'_9110 "Difference of `var' between 2010 and 1991"
	
	gen d_`var'_0010 = f10.`var' - `var' if year == 2000
	label var d_`var'_0010 "Difference of `var' between 2010 and 2000"
}

/*******************************************************************************
	6. 	MERGE THE IMMIGRATION RATES OF EACH MCARP - 1991-2010
*******************************************************************************/
merge m:1 $geo using "${db_tmp}/immig_ibge", keepusing(mig_10_rural) nogen

/* Work on the mig variables */
rename mig_10_rural	mig_0110_rural

/* Create rates, recovering them from the logs (0-100% scale)*/
gen rate_mig_0110_rural = exp(mig_0110_rural)
replace rate_mig_0110_rural = rate_mig_0110_rural*100

/*******************************************************************************
	7.	MERGE SHIFT-SHARE INSTRUMENT
*******************************************************************************/
/* Merge in the shift-shares for the whole period */
rename ${geo} ${geo}_destination

merge m:1 ${geo}_destination using "${db_tmp}/shift_share", ///
keepusing(shift_0009_dws_t_rur shift_0009_dws_t_rur_abs) nogen

merge m:1 ${geo}_destination using "${db_tmp}/shift_share_drought_controls", ///
keepusing(shift_8190_dws_t_rur) nogen

rename ${geo}_destination ${geo}

tempfile short_dataset
save `short_dataset', replace

/*******************************************************************************
	8.	MERGE POP GROWTH AND INSTRUMENT FOR THE NATIVES POP GROWTH 0010
*******************************************************************************/
global years "80 91 00 10"

foreach y in $years {
	cd "${census_harm}"
	unzipfile Census_`y'
	use wtper muni using "Census_`y'", replace
	merge m:1 muni using "${geocodefile}", ///
	nogen keepusing(mcarp8010) keep(match master)
	gen pop_all`y' = 1
	collapse (sum) pop_all`y' [pw = wtper], by(mcarp8010)
	
	tempfile pop_all`y'
	save `pop_all`y''
	
	cap rm "Census_`y'.dta"
}

use `pop_all80', replace
merge 1:1 mcarp8010 using `pop_all91', nogen
merge 1:1 mcarp8010 using `pop_all00', nogen
merge 1:1 mcarp8010 using `pop_all10', nogen

gen l_pop_all80 = ln(pop_all80)
gen l_pop_all91 = ln(pop_all91)
gen l_pop_all00 = ln(pop_all00)
gen l_pop_all10 = ln(pop_all10)

gen pop_growth8091 = l_pop_all91 - l_pop_all80
gen pop_growth0010 = l_pop_all10 - l_pop_all00

drop if mcarp8010 == 1000000

keep mcarp8010 pop_growth8091 pop_growth0010 l_pop_all91

save "${db_tmp}/pop_growth_8010", replace

use `short_dataset', replace
merge m:1 mcarp8010 using "${db_tmp}/pop_growth_8010", nogen

sleep 100000

/*******************************************************************************
	9.	SAVE AND REMOVE TEMP FILES
*******************************************************************************/
save "${db_tmp}/${geo}_analysis_dataset", replace

* Delete temp files
cap rm "${db_tmp}/${geo}_census_citypanel.dta"
cap rm "${db_tmp}/pop_growth_8010.dta"
cap rm "${db_tmp}/shift_share.dta"
cap rm "${db_tmp}/shift_share_drought_controls.dta"
cap rm "${db_tmp}/immig_ibge.dta"

global years "91 00 10" 
foreach y in $years { 
    cap rm "${db_tmp}/${geo}_pcity_avg_`y'.dta" 
    cap rm "${db_tmp}/${geo}_phous_avg_`y'.dta" 
    cap rm "${db_tmp}/${geo}_expulsion_avg_`y'.dta" 
    cap rm "${db_tmp}/${geo}_census_`y'_citypanel.dta" 
	cap rm "${db_tmp}/combes_1st_a_l_wage_r_`y'.dta"
	cap rm "${db_tmp}/combes_1st_a_l_wage_g_`y'.dta"
	cap rm "${db_tmp}/combes_1st_a_l_wage_r_ind_`y'.dta"
	cap rm "${db_tmp}/combes_1st_a_l_wage_r_hs_`y'.dta"
	cap rm "${db_tmp}/combes_1st_a_l_rent_r_`y'.dta"
	cap rm "${db_tmp}/combes_1st_a_l_rent_g_`y'.dta"
	cap rm "${db_tmp}/combes_1st_a_l_rent_rr_qual_`y'.dta"
}


